# -*- coding: utf-8 -*-
from scrapy.spider import CrawlSpider,Rule  
from scrapy.linkextractors import LinkExtractor  
from demo.items import MasterItem  

class CSDNSpider(CrawlSpider):
    name = 'master'
    print("start urls begin")
    print("start urls begin")
    print("start urls begin")
    #allowed_domains = ['fang.5i5j.com']
    #start_urls = ['https://fang.5i5j.com/bj/loupan/']
    allowed_domains = ['edu.csdn.net']
    start_urls = ['https://edu.csdn.net/courses/o340_k']
    item = MasterItem()
    print("rules urls begin")
    print("rules urls begin")
    print("rules urls begin")
    #Rule是在定义抽取链接的规则
    rules = (  
        Rule(LinkExtractor(allow=('https://edu.csdn.net/courses/o340_k/p[0-9]+','https://edu.csdn.net/course/detail/[0-9]+',)), callback='parse_item',  
             follow=True),  
    ) #'https://edu.csdn.net/course/detail/[0-9]+',

    def parse_item(self, response):
        item = self.item  
        '''
        print("parse item begin")
        print("parse item begin")
        print("parse item begin")
        '''
        item['url'] = response.url
        return item
